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//include <stdio.h> 



#define MATCH_NEEDED_IN_2ND 3 

^define LEN_MER 8 // recompile when changed. *things to pay attention to. 
int SecondStruct(const char*); 

int CrossHyb(const char *strl, const char *str2, int overlapjength); 
int SimpleMatch(const char *strl, const char *str2); 
char FirstN(const char *strl, const char *str2, int N); 

int GC_ADDITION=l; 
int NUM_GC = 4; 

int SCORE_NEEDED_IN_2ND = 7; // 1+2+4 
chareql,eq2; 

FILE *fp = stdout; 
FILE *ft>iog = stderr; 

main(int argc, char **argv) 
{ 

int ii, jj, comp_score, s; 

int MM[LEN_MER]; 

char line[256], str[LEN_MER+ 1 ] ; 

int gcsum, passage, total_probes, failed_fh, failed_ch, failed_sm; 
char convert [4]; // 0123 to atcg convertion. 
char *compatible; 

int max_prb, cnt_prb; 
char **probe; 

int max_snd, cnt_snd; 

char **sndstr; 

int *snd_matchcnt; 

int SIMPLE_CUTOFF = 5; // reject if this many bps match to each other, 

// no matter where they are located, 
int CROSSHYB_CUTOFF = 9; // 1+2+4 + 2 
int CROSSHYB_OVERLAP = 5; 

intFIRSTN = 4; 

eql=eq2 = "; 



if(argc = 1) 
{ 




fprintf(stderr, 

"Usage: %s -o output_file[stdout]\n", 

argv[0]); 
fprintf^stderr, 

n \t\t-gc nuraber_of_GCs_in_probe[%d]\n M , 

NUMJ3C); 
fprintf(stderr, 

"\t\t-2 secondary_structure_j*ej ect( including this value)[%d]\n'\ 

SCORE_NEEDEDJN_2ND); 
fprintf(stderr, 

"\t\t-ch crosshyb_reject(including this value)[%d]\n", 

CROSSHYB_CUTOFF); 
fprintf(stderr, 

M \t\t-sm simple_match_reject(including this value)[%d]\n M , 

SIMPLE_CUTOFF); 
fprintf^stderr, 

"\t\t-ol crosshyb_overlapJength[%d]\n", 

CROSSHYB_OVERLAP); 
fprintf(stderr» 

"\t\t-eq add'l^equiv^bp^in^ompa^checkingt 0 /^ 0 /^] (e.g., -eq gt)\n", 
eql,eq2); 

fprintf(stderr, "\t\t-fn first_NJength[%d]\n M , FIRST_N); 
fprintf(stderr, "\t\t-gc_add GC jidd^endty[%d]\n", GC_ADDITION); 

exit(0); 

} 

// parse input parameters. 
ii=l; 

while(ii < argc) 

{ 

if(strcmp(argv[ii], "-go") — 0) 
sscanf(argv[ii+l], "%d", &NUM_GC); 

else if(strcmp(argv[ii], "-2") = 0) 
sscanf(argv[ii+l], "%d", &SCORE_NEEDED_IN_2ND); 

else if(strcmp(argv[ii], M -ch") = 0) 
sscanf(argv[ii+l], "%d", &CROSSHYB.CUTOFF); 

else if(strcmp(argv[ii], "-ol") = 0) 
sscanf(argv[ii+l], "%d\ &CROSSHYB_OVERLAP); 

else if(strcmp(argv[ii], "-eq") — 0) 

' FIG. 16B 



if(strlen(argv[ii+l]) = 2 1| 
(strlen(argv[ii+l]) = 3 && argv[ii+l][2] = W)} 

{ 

eql = argv[ii+l][0]; 
eq2 = argv[ii+l][l]; 

} 

else 

^ fprintf(stderr, "\nERROR: Invalid string after -eq flag.\n\n"); 
exit(l); 

} 

} 

else if(strcmp(argv[ii], "-o") = 0) 

( if((fp = fopen(argv[ii+l], "w")) — NULL) 

^fprintfitstderr, "Can't open file %s to write.\n M t argv[ii+l]); 
exit(l); 

} 

char logname[128]; 

sprintf(logname, "%s.log", argv[ii+l]); 

if((fplog = fopen(logname, V)) = NULL) 

fprintf(stderr, "failed creating log. stderr usedAn"); 
fplog = stderr; 

} 

} 

else if(strcmp(argv[ii], "-fin") = 0) 

^ sscanf(argv[ii+l], "%d", &FIRST_N); 
} 

else if(strcmp(argv[ii], "-sm") = 0) 

{ sscanf(argv[ii+l], "%d", &SIMPLE_CUTOFF); 
} 

else if(strcmp(aEgv[ii], "-gc_add") = 0) 

{ sscanf(argv[ii+l], "%d", &GC_ADDITION); 
} 
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else 
{ 



fprintf(stderr, "Unknow flag %s\n M , argv[ii]); 
exit(l); 

} 

ii+=2; 

} 

max_prb = 30000; 
probe = new char* [max_prb]; 
for(ii = 0; ii < max_prb; ii++) 
probe[ii] = new char [LEN_MER+1]; 
cnt_prb = 0; 

max_snd = 5000; 

sndstr = new char* [max_snd]; 

for(ii = 0; ii < max_snd; ii++) 

sndstr[ii] = new char [LEN_MER+1]; 
snd_matchcnt = new int [max_snd]; 
cnt_snd = 0; 

// build an array of probes. Each probe is of length LEN_MER, 
// of which ■NUM_GC are Gs or Cs. 

convert[0] = V; 
convert[l] = Y; 
convert[2] = f c f ; 
convert[3] = 'g'; 

total_probes = 0; 

pass^c - 0; // number of probes pass GC test. 
for(MM[0] = 0; MM[0] < 4; MM[0]++) 
for(MM[l] - 0; MM[1] < 4; MM[1]++) 
for(MM[2] = 0; MM[2] < 4; MM[2]++) 
for(MM[3] = 0; MM[3] < 4; MM[3]++) 

for(MM[4] = 0; MM[4] < 4; MM[4]-H-) //*things to pay attention to. 
for(MM[5] = 0; MM[5] < 4; MM[5]++) //*things to pay attention to. 
for(MM[6] = 0; MM[6] < 4; MM[6]++) //^things to pay attention to. 
for(MM[7] = 0; MM[7] < 4; MM[7]++) //^things to pay attention to. 
{ 

total_probes++; 
gesum = 0; 



// build a probe. 




forQj = 0; jj < LEN_MER; jj++) 
{ 

strQj] = convert [MM [jj]]; 
ifCstrQj] = 'c* j| str[jj] = 'g') 
gcsum++; 

} 

str[LEN_MER] = W; 

// check its GC contents and secondary structure. 

if(gcsum == NUM_GC) 

{ 

pass _gc-H-; 

fprintf(rplog, "pass GCtest: %s\n", str); 
if[ ! Seconds truct(str)) 

r 

strcpy(probe[cnt_prb], str); 
if(++cnt_prb = max_prb) 
{ 

// should relocate memory. 

// To simplefy the program, let's just give an error msg. 

fprintf(stderr, "ERROR: Probe array is too small. cnt_prb is %d\n", cnt_prb); 

exit(l); 

} 

} 

else 
{ 

// record the rejected string 
strcpy(sndstr[cnt_snd], str); 
if(++cnt_snd = max_snd) 

{ 

fprintf(stderr, "ERROR: Secondary Structure array is too small. cnt_snd «= %d\n", 

cnt_snd); 

exit(l); 

} 

} 

} 

} 



fprintfi(fp, "\n%d mer probe selection\n", LEN_MER); 
fprintf(fp, "Number of GCs in the probes: %d\n", NUM_GC); 
fprintf(fp, "Score to reject as secondary structure: %d\n", 

S C ORE_NEEDED_IN_2ND) ; 
fprintf(fp, "Score to reject as incompatible: %d\n", CROSSHYB_CUTOFF); 
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fprintf(fp, "Compatible test overlap: %d\n", CROSSHYBJ3VERLAP); 
fprintf(fi), "Additional equivalent base-pair in compatibility checking: %c%c\n", 
eql,eq2); 

fprintf(fp, "Simple match cutoff value(including): %d\n\ SIMPLE_CUTOFF); 
fprintf(fp, "First N value(including): %d\n", FIRST_N); 
fprintf(fp, "Additional penalty for G or C: %d\n", GC^ADDITION); 
fprintf(fp, "\n\n"); 

fprintf(fp, "Total possible %d mers: %d\n", LEN_MER, total_probes); 

fprintf(fp, "Number passed GC_test : %d\n", pass _gc); 

f^rintf(f^, "Number passed secondary structure test : %d\n", cnt_prb); 
// for(ii - 0; ii < cnt_snd; ii++) 
// fprintf(fp, "%s\n", sndstrfii]); 

// From the set (call it setl) of probes which passed GC and 2nd structure 
// tests, choose a probe into the final set(set2). Then compare this 
// probe against all the probes left in setl and throught out the ones 
// that may crosshyb to this probe. From what's left in setl, choose 
// another probe and compary it to the rest of setl... 

compatible = new char [cnt_prb]; 
for(ii = 0; ii < cnt_prb; ii++) 
{ 

compatible[ii] = T; 

} 

// Compatibility check #1: Use weighted scores to penalize neighboring matches. 
// first_match_score = 1 ; 

// if prev pair is a match, current_match_score = prev_match_score*2. 
ii = 0; 

failed_ch = 0; 
while(ii < cnt_prb) 



if(compatible[jj] = T && 
(s=CrossHyb(probe[ii],probe[ij],CROSSHYB^OVERLAP)) >= CROSSHYB_CUTOFF) 

{ 

compatibleQj] = 'F; 
failed_ch++; 

fprintf(fplog, "Rejected(%d) %s in slide test for %s\n", 
s, probeQj], probe[ii]); 



{ 



for(jj = ii+1 ; jj < cnt_prb; 
{ 



} 
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} 



ii++; 

while(ii < cnt_prb && compatiblepi] — 'F) 
ii++; 

} 

fprintfitfp, "Number of probes passed compatibility test: %d\n*\ 
cnt_prb - failed_ch); 

// Compatibility check #2: Use unweighted score: count unconsecutive matches 

// find the first 'passed 1 probe. 
ii = 0; 

while(ii < cnt jprb && compatiblepi] — T) 

ii++; 
failed_sm = 0; 
while(ii < cnt_prb) 
{ 

forQj = ii+1 ; jj < cnt_prb; 

{ 

ifi[compatibleQj] — T && 
(s=SimpleMatch(probe[ii],probe[jj])) >= SIMPLE_CUTOFF) 

{ 

compatibleQj] — T ! ; 

fprintf([fplog, "Rejected(%d) %s in simple_match test for %s\n", 

s, probeQj], probe[ii]); 
failed__sm++; 

} 

} 



Mm ' ' f 

while(ii < cnt _prb Sc& compatiblepi] = T) 
ii++; 

} 

fprintf(fp, "Number of probes passed simple match test: %d\n", 
cnt_prb - failed_ch - failed_sm); 



// Compatibility check #3: if the first N bases match ANYWHERE in another probe. 

// find the first 'passed* probe, 
ii = 0; 

while(ii < cnt_prb && compatiblepi] = *F) 

ii++; 
failed_fii = 0; 

-*<«u-9 F|G _ 16Q 



• 



{ 

forQj = ii+1 ; jj < cnt_prb; 
{ 

i£(compatibleQj] = T && 
FirstN(probe[ii], probeQj], FIRST_N) = T) 

{ 

cxrnipatibleQj^'F'; 
failed_fii++; 

fprintf(fplog, "Rejected %s in FIRSTN test for %s\n", 
probeQj], probe|ii]); 



ii++; 

while(ii < cnt_prb && compatible[ii] = TP) 
ii-H-; 

} 

fprintfi(fp, "Number of probes passed FIRSTN compatibility test: %d\n", 
cnt _prb - failed_ch - failed_sm - failed_fh); 

// output. 
JJ = 0; 

fprintf(fp, "\nSelected probes are: \n M ); 
for(ii = 0; ii < cnt_prb; ii-H-) 



// Check if 'str* contains a secondary structure. That is, if there is a 
// consecutive 3 bases that matches when 'str* is folded. 
// return 1 if found secondary structure, 0 otherwise. 

int SecondStruct(const char *str) 



} 



} 



if(compatible[ii] = T) 
{ 

fprintf(fp, "%s \n M , probe[ii]); 

} 



intii,jj,kk, 11; 
int sum, score[32]; 
char prev_match; 
char *compl; 
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char complement[256]; 
complementf'a'] = f ; 
complement^] = 'a'; 
complementf c'] = 'g'; 
complementfg'] = 'c'; 

11 = strlen(str); 

compl = new char pi+1]; 

for(ii = 0;ii<ll;ii-H-) 

{ 

compl [ii] = complement[str[ii]]; 

} 

for(ii = MATCH_NEEDED_IN_2ND; ii < 11 - MATCH_NEEDED_IN_2ND; ii++) 
{ 

prevmatch = TP; 
sum = 0; 

forQj=0;jj<ii;jj++) 
{ 

score [jj] = 0; 
kk = ii*2-jj; 
if(kk < 11) 

{ 

if(str[jj] — compl [kk]) 
{ 

if(prev_match = T) 
{ 

score[jj] = scoreftj-1] * 2; 

} 

else 
{ 

score [jj] = 1; 
prev_match = T; 

} 

} 

else 
{ 

prev_match = 'F; 

} 

} 

sum += score [jj]; 

} 

// fprintf(stderr, "2* sum = %d\n", sum); 
if(sum >= SCORE_NEEDED_IN_2ND) 
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{ 

delete [] compl; 

return 1 ; // Found a 2nd structure. 

} 

for(ii = MATCH_NEEDED_IN_2ND - 1; ii < 11 - MATC H_NEEDED_IN_2ND ; ii++) 
{ 

prev_match = 'P; 
sum = 0; 

for(ij = 0;jj<=ii;ij-H-) 
{ 

score[jj] = 0; 
kk = ii*2+l -jj; 
if(kk<ll) 
{ 

if(strQj] = compl [kk]) 
{ 

if(prev_match = T) 
{ 

score[jj] ■ scoreQj- 1]*2; 

} 

else 

{ 

scoreQj] = 1; 
prev_match = T; 

} 

} 

else 
{ 

prev_match = 'F'; 

} 

} 

sum += scoreQj]; 

} 

// fprintf(stderr, "T sum = %d\n", sum); 
if(sum >= SCORE_NEEDED_IN_2ND) 

{ 

delete Q compl; 

return 1 ; // Found a 2nd structure. 

} 

} 

delete Q compl; 
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return 0; // No 2nd structure. 

} 

// check if strl and str2 can hybridizy together. 
// return the max of match scores. 
// Assume strlen(strl) = strlen(str2). 

int CrossHyb(const char *strl, const char *str2, int overlap) 



int ii, jj, len, sum, score, prev score, max_sum, numGC; 
char prev_match; 

len = strlen(strl); 
max_sum = 0; 

fprintf(fplog, "Sliding test between %s and %s\n", strl, str2); 

for(ii = overlap-len; ii <= len-overlap; ii++) 
{ 

numGC = 0; 
sum = 0; 

score = prev_score = 0; 
prevjnatch = 'F; 
fprintf(fplog, "Compare "); 
for(jj » ii; jj < len && jj - ii < len; 
{ 

if(jj >=0&&jj-ii >= 0) 



if(((strlQj]|32) = 'g' && (str2[jj-ii]|32) — 'g') || 
((strl[u]|32) = V && (str2Qj-ii]|32) = 'c')) 
numGC++; 

if(prev_match = T) 
{ 

Score = prev_score*2; 



fprintf(fplog, "(%c,%c) strlQj], str2[jj-ii]); 
if((strlQj] = str2[jj-ii]) || 
(strl Qj] — eql && str2Qj-ii] — eq2) || 
(strl [jj] = eq2 && str2[jj-ii] = eql)) 

{ 



} 

else 
{ 



score 
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prev_match = T; 

} 

} 

else 

{ 

score = 0; 
prev_match = 'F; 

} 

sum += score; 
prev_score = score; 

} 

} 

fprintf(fplog, M Score=%d\n'\ sum + numGC*GC_ADDITION); 

if(sum + numGC*GC_ADDITION > max_sum) 
max sum = sum + numGC*GC_ADDIT10N; 

} 

fprintf(fplog, "Max score is %d\n", max_sum); 
return max_sum; 

// Compare 2 strings base to base, 0 to 0, 1 to 1..., no sliding. 

// return number of matches. 

// Assume strlen(strl) = strlen(str2). 

int SimpleMatch(const char *strl, const char # str2) 
{ 

int ii, sum; 
sum = 0; 

for(ii = 0; ii < strlen(strl); ii++) 
{ 

if((strl[ii] = str2[ii]) || 
(strl [ii] = eql && str2[ii] = eq2) || 
(strl [ii] = eq2 && str2[ii] = eql)) 

{ 

sum++; 

} 

} 

return sum; 
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// Check if the first N bases of the two probes are identical, 
char FirstN(const char *strl, const char *str2, int N) 
{ 

int ii; 

char match = T; 

if(N>strlen(strl)) 
return 'F; 

for(ii = 0; ii<N; ii++) 
{ 

if(!((strl[ii] = str2[ii]) || 
(strl [ii] = eql && str2[ii] = eq2) || 
(strl [ii] = eq2 && str2[ii] = eql))) 

{ 

match = T; 
break; 

} 

} 

return match; 

i 
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Combine resin supports, 
Mix and aliquot into new 
synthesis columns 



AA 




GG 




CC 




TT 


GA 




AG 




AC 




AT 


CA 




CG 
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Synthesize the nucleotide A on a column 
and in parallel synthesize G, C and T 
each on their own oligo synthesis 
columns. 



After second round of synthesis there are a 
total of 16di-nucleotide sequences. If 
columns were combined, mixed, aliquoted, 
and followed by a third round of synthesis, a 
total of 64 different tri-nucleondes would be 
generated. 
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RNA negative 
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^Starting Perturbagen Library in . v , 
cell grown in dextrose « non t ] 
expressing conditions 




Introduce ID Tagged Perturbagen library into 
cells and express the perturbagen 




Each cell expresses a 
^) different perturbagen. 
Most have no effect. . . 
some kill 



1 



Reisolate plasmid library after passage from induced 
and non induced grown cells. Keep libraries separate. 




T7 transcribe ID Tags in passaged 
"Dextrose" library and "Galactose" Library 
to generate mg quantities of ID Tags as 
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FITC label Starting 
Library RNA 



1 



Rhodamine label 
Passaged Library 
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Sequence and clone Perturbagen 
DNA back into expression construct 
and validate effect 



t 



CID- 



Pert 



Use specific ID-Tag primer and a 
common vector primer juxtaposed 
to perturbagen to amplify 
corresponding perturbagen 



t 



Sequence ID Tag and 
generate complement oligo 
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RT-PCR amplify RNA off beads 
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Sort single FTTC+ beads 
into 96 well PCR Plate 



FITC 



Combine and hybridize RNA from 
libraries to ID Tag Beads 



Sort all Beads on Cell sorter to identify Beads 
labeled only with FITC+ RNA 



